import nltk
from nltk.tokenize import word_tokenize
from nltk import pos_tag
from nltk.downloader import Downloader
import os
import gradio as gr
 
# 设置 NLTK 数据路径
data_path = './nltk_data'
os.makedirs(data_path, exist_ok=True)
nltk.data.path.append(data_path)
 
# 下载所需的资源包
if not os.path.exists(os.path.join(data_path, 'tokenizers', 'punkt_tab')):
    downloader = Downloader(download_dir=data_path)
    downloader.download('punkt_tab')
if not os.path.exists(os.path.join(data_path, 'taggers', 'averaged_perceptron_tagger_eng')):
    downloader = Downloader(download_dir=data_path)
    downloader.download('averaged_perceptron_tagger_eng')
 
def process_text(input_text):
    # 分词
    tokens = word_tokenize(input_text)
    
    # 词性标注
    pos_tags = pos_tag(tokens)
    
    # 格式化输出
    tagged_text = " ".join([f"{word}/{tag}" for word, tag in pos_tags])
    return tagged_text
 
# 创建 Gradio 接口
iface = gr.Interface(
    fn=process_text,
    inputs="text",
    outputs="text",
    examples=["This is an example sentence."],
    title="NLTK 分词和词性标注",
    description="输入一段英文文本，返回分词和词性标注的结果。"
)
 
# 启动 Gradio 应用
iface.launch(server_name="0.0.0.0", server_port=7862)